package com.rongji.dfish.misc.chinese;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;

import com.rongji.dfish.base.TrieTree;

public class PinyinConverter {
	public static final int WITH_TONE_MARK = 2;
	public static final int WITHOUT_TONE = 1;
	public static final int WITH_TONE_NUMBER = 0;
	/**
	 * 默认-音序
	 */
	public static final int MODE_COMMON=0;
	/**
	 * 音序-按人员姓名优化
	 */
	public static final int MODE_PERSON_NAME=1;
	private int mode;
	public PinyinConverter(int mode){
		this.mode=mode;
	}
	public  String convert(String str, String separator, int pinyinFormat) {
		TrieTree<String> trieTree=getMainLib();
		return convert(trieTree, str, separator, pinyinFormat);
	}
	private  String convert(TrieTree<String> trieTree,String str, String separator, int pinyinFormat) {
		StringBuilder sb=new StringBuilder();
		List<TrieTree.SearchResult<String>> result=trieTree.search(str);
		char[] chs=str.toCharArray();
		int i=0;
		for(TrieTree.SearchResult<String> token:result){
			if(token.getBegin()>i){
				sb.append(chs,i,token.getBegin()-i);
			}
			switch(pinyinFormat){
				case WITHOUT_TONE:
					sb.append(token.getValue().replace("5","").replace("4","").replace("3","")
							.replace("2","").replace("1",""));
					break;
				case WITH_TONE_MARK:
					sb.append(convert(getVowel(),token.getValue(), null, WITH_TONE_NUMBER));
					break;
				default:sb.append(token.getValue());
			}
			i=token.getEnd();
			if(separator!=null&&!separator.equals("")){
				sb.append(separator);
			}
		}
		if(i<chs.length){
			sb.append(chs,i,chs.length-i);
		}
		return sb.toString();
	}

	private TrieTree<String> MAIN_LIB =null;
	private TrieTree<String> VOWEL =null;
	private TrieTree<String> getMainLib() {
		if(MAIN_LIB==null){
			synchronized (PinyinConverter.class) {
				if(MAIN_LIB==null){
					//反序(从右往做匹配) 中文的词，经常后面那个字更有意义，所以反序一般会有更优的匹配度
					MAIN_LIB=new TrieTree<String>(true);
					switch(mode){
					case MODE_PERSON_NAME:
						loadCharLib(MAIN_LIB,"/com/rongji/dfish/misc/chinese/char_chinese_name_lib.txt");
						break;
					default:
						loadCharLib(MAIN_LIB,"/com/rongji/dfish/misc/chinese/char_chinese_lib.txt");
					}
					loadPairLib(MAIN_LIB,"/com/rongji/dfish/misc/chinese/word_lib.txt");//初始化词典
				}
			}
		}
		return MAIN_LIB;
	}
	private TrieTree<String> getVowel() {
		if(VOWEL==null){
			synchronized (PinyinConverter.class) {
				if(VOWEL==null){
					VOWEL=new TrieTree<String>();
					loadPairLib(VOWEL,"/com/rongji/dfish/misc/chinese/tone_lib.txt");//初始化韵母表
				}
			}
		}
		return VOWEL;
	}
	private void loadPairLib(TrieTree<String> tree, String fileName) {
		try{
			InputStream is=getClass().getResourceAsStream(fileName);
			BufferedReader bis=new BufferedReader(new InputStreamReader(is,"UTF-8"));
			String line="";
			while ((line=bis.readLine())!=null){
				if(line==null||line.indexOf('=')<0){
					continue;
				}
				String[] pair=line.split("=");
				tree.put(pair[0], pair[1]);
			}
			
			if(bis!=null){
				bis.close();
			}
			}catch(Exception ex){
				ex.printStackTrace();
			}
	}
	private void loadCharLib(TrieTree<String> tree, String fileName) {
		try{
			InputStream is=getClass().getResourceAsStream(fileName);
			BufferedReader bis=new BufferedReader(new InputStreamReader(is,"UTF-8"));
			String line="";
			while ((line=bis.readLine())!=null){
				if(line==null||line.indexOf('=')<0){
					continue;
				}
				String[] pair=line.split("=");
				for(char c:pair[1].toCharArray()){
					String key=new String(new char[]{c});
					tree.put(key, pair[0]);
				}
			}
			
			if(bis!=null){
				bis.close();
			}
			}catch(Exception ex){
				ex.printStackTrace();
			}
	}

	

	public String convert(String str) {
		return convert(str,"",WITHOUT_TONE);
	}
}
